Workflow

The plague-phylogeography snakemake pipeline.

Click the nodes to obtain details about each step.

Alignment

Snippy Multi

Snippy Pairwise

Logs

Alignment

Phylogeny

Metadata

General

Phylogenetics

IQTREE

Post-Alignment

Qualimap

Quality Control

MultiQC

Statistics

If the workflow has been executed in cluster/cloud, runtimes include the waiting time in the queue.

Configuration

Configuration files
File Code
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Snakemake Configuration File

# SQLITE Parameters
sqlite_db : "yersinia_pestis_db.sqlite"
sqlite_select_command_asm : SELECT
                              AssemblyFTPGenbank
                            FROM
                              BioSample
                            LEFT JOIN Assembly
                              ON AssemblyBioSampleAccession = BioSampleAccession
                            WHERE
                              (BioSampleComment LIKE '%KEEP%Assembly%Modern%' AND
                               length(AssemblyFTPGenbank) > 0)
sqlite_select_command_sra : SELECT
                              BioSampleAccession,
                              SRARunAccession
                            FROM
                              BioSample
                            LEFT JOIN SRA
                              ON SRABioSampleAccession = BioSampleAccession
                            WHERE
                              (BioSampleComment LIKE '%KEEP%SRA%Ancient%' AND
                               SRAComment NOT LIKE "%REMOVE%")
sqlite_select_command_ref : SELECT
                              AssemblyFTPGenbank
                            FROM
                              BioSample
                            LEFT JOIN Assembly
                              ON AssemblyBioSampleAccession = BioSampleAccession
                            WHERE
                              (BioSampleComment LIKE '%Assembly%Modern%Reference%')

# Dataset size
max_datasets_assembly : 1000
max_datasets_sra : 1000
reads_origin :
  - "assembly"
  - "sra"
  - "local"

# misc filtering
detect_repeats_threshold : 90
detect_repeats_length : 50

reference_locus : "AL590842"
reference_locus_name : "chromosome"
reference_locus_start : "0"
reference_locus_end : "4653728"

# Eager param
eager_rev: "2.2.1"
eager_clip_readlength : 35
eager_bwaalnn : 0.01
eager_bwaalnl : 16
organism : "Yersinia pestis"

# Snippy Parameters
snippy_ctg_depth : 10
snippy_bam_depth : 3
snippy_base_qual : 20
snippy_map_qual : 30
snippy_min_frac : 0.9
snippy_mask_char : "X"
snippy_missing_data : 5
snippy_snp_density : 10
# Make this an empty string if removing singletons
snippy_keep_singleton: ""
#snippy_keep_singleton : "--keep-singleton"
snippy_multi_plot_missing_data:
  - 0
  - 1
  - 2
  - 3
  - 4
  - 5
  - 6
  - 7
  - 8
  - 9
  - 10

# IQTREE
#iqtree_model: "-m MFP"
iqtree_model : "-m K3Pu+F+I"
iqtree_seed : "47321424" # keeping it consistent in a config file allows for checkpointing
# Outgroup Option #1: Reference
#iqtree_outgroup : "Reference"
# Outgroup Option #2: Basal modern clade
#iqtree_outgroup : "GCA_000323485.1_ASM32348v1_genomic,GCA_000323845.1_ASM32384v1_genomic"
# Outgroup Option #3: Basal ancient clade
iqtree_outgroup : "SAMEA3541826,SAMEA3541827"
iqtree_other : "--ufboot 1000 --alrt 1000"
iqtree_runs : 10

Loading...